rm(list=setdiff(ls(), "twd")) 
options(warn=-1)
library(fixest);library(AER);library(stringr);library(knitr)

data<-readRDS(paste(twd,"data/made_data/vreg_wgeos_race.rds",sep=""))


## In person 20
data$in_person_primary_20<-ifelse(data$voting_method_200303 == "P",1,0)
## Abs early 20
data$abs_early_primary_20<-ifelse(data$voting_method_200303 %in% c("A","E"),1,0)
## In person 18
data$in_person_general_18<-ifelse(data$voting_method_181106 == "P",1,0)
## Abs early 18
data$abs_early_general_18<-ifelse(data$voting_method_181106 %in% c("A","E"),1,0)
## Take First Difference
data$diff_in_person<-data$in_person_primary_20-data$in_person_general_18
data$diff_abs_early<-data$abs_early_primary_20-data$abs_early_general_18
### get the minimum of the distance to a consolidated (including super sites)
data$distance_realized_min<-apply(cbind(data$distance_realized,data$distancegov1,data$distancegov2),1,min,na.rm=T)
## Difference between assigned and realized distances (in logs and levels)
data$ln_dist_change<-ifelse(data$moved==1,log(data$distance_realized_min)-log(data$distance_assigned),0)
data$lev_dist_change<-ifelse(data$moved==1,data$distance_realized_min-data$distance_assigned,0)
### Difference between assigned and realized size # of assigned voters (in logs and levels)
data$ln_diff_N<-log(data$realized_N)-log(data$assigned_N)
data$lev_diff_N<-data$realized_N-data$assigned_N

#### Create differences in turnout for all previous elections and run regression ######

## 18-18b

data$in_person_primary_18<-ifelse(data$voting_method_180802== "P",1,0)
data$abs_early_primary_18<-ifelse(data$voting_method_180802 %in% c("A","E"),1,0)

data$diff_in_person_18_18<-data$in_person_general_18-data$in_person_primary_18
data$diff_abs_early_18_18<-data$abs_early_general_18-data$abs_early_primary_18

## 18-16

data$in_person_general_16<-ifelse(data$voting_method_161108== "P",1,0)
data$abs_early_general_16<-ifelse(data$voting_method_161108 %in% c("A","E"),1,0)

data$diff_in_person_18_16<-data$in_person_primary_18-data$in_person_general_16
data$diff_abs_early_18_16<-data$abs_early_primary_18-data$abs_early_general_16

## 16-16a

data$in_person_primary_16a<-ifelse(data$voting_method_160804== "P",1,0)
data$abs_early_primary_16a<-ifelse(data$voting_method_160804 %in% c("A","E"),1,0)

data$diff_in_person_16_16a<-data$in_person_general_16-data$in_person_primary_16a
data$diff_abs_early_16_16a<-data$abs_early_general_16-data$abs_early_primary_16a

## 16a-16b

data$in_person_primary_16b<-ifelse(data$voting_method_160301== "P",1,0)
data$abs_early_primary_16b<-ifelse(data$voting_method_160301 %in% c("A","E"),1,0)


data$diff_in_person_16_16b<-data$in_person_primary_16a-data$in_person_primary_16b
data$diff_abs_early_16_16b<-data$in_person_primary_16a-data$abs_early_primary_16b

## 16b-14

data$in_person_general_14<-ifelse(data$voting_method_141104== "P",1,0)
data$abs_early_general_14<-ifelse(data$voting_method_141104 %in% c("A","E"),1,0)

data$diff_in_person_16_14<-data$in_person_primary_16b-data$in_person_general_14
data$diff_abs_early_16_14<-data$in_person_primary_16b-data$abs_early_general_14

## 14-14a

data$in_person_primary_14<-ifelse(data$voting_method_140807== "P",1,0)
data$abs_early_primary_14<-ifelse(data$voting_method_140807 %in% c("A","E"),1,0)

data$diff_in_person_14_14<-data$in_person_general_14 - data$in_person_primary_14
data$diff_abs_early_14_14<-data$abs_early_general_14 - data$abs_early_primary_14

## 14a-12

data$in_person_general_12<-ifelse(data$voting_method_121106== "P",1,0)
data$abs_early_general_12<-ifelse(data$voting_method_121106 %in% c("A","E"),1,0)

data$diff_in_person_14_12<-data$in_person_primary_14 - data$in_person_general_12
data$diff_abs_early_14_12<-data$abs_early_primary_14 - data$abs_early_general_12


data$med_dist_change<-ifelse(data$ln_dist_change > median(data[data$moved==1,"ln_dist_change"]),1,0)
data$med_N_change<-ifelse(data$ln_diff_N > median(data[data$consolidated==1,"ln_diff_N"]),1,0)



ft1<-feols(diff_in_person~moved+consolidated+ln_dist_change+I(ln_dist_change^2)+ln_diff_N+I(ln_diff_N^2)
           +I(dist_damage_min<500)+I(dist_power_min<500)+diff_abs_early,data=data,cluster~polling_place_text_name)
ft2<-feols(diff_in_person~moved+consolidated+ln_dist_change+I(ln_dist_change^2)+I(ln_dist_change^3)+ln_diff_N+I(ln_diff_N^2)+I(ln_diff_N^3)
           +I(dist_damage_min<500)+I(dist_power_min<500)+diff_abs_early,data=data,cluster~polling_place_text_name)


ft3<-feols(diff_in_person~moved+consolidated+lev_dist_change+I(lev_dist_change^2)+lev_diff_N+I(lev_diff_N^2)
           +I(dist_damage_min<500)+I(dist_power_min<500)+diff_abs_early,data=data,cluster~polling_place_text_name)
ft4<-feols(diff_in_person~moved+consolidated+lev_dist_change+I(lev_dist_change^2)+I(lev_dist_change^3)+lev_diff_N+I(lev_diff_N^2)+I(lev_diff_N^3)
           +I(dist_damage_min<500)+I(dist_power_min<500)+diff_abs_early,data=data,cluster~polling_place_text_name)

#############################################################



pdf(paste(twd,"figs/marginal_effects_cube_levs.pdf",sep=""),12,12)

par(mfrow=c(2,2))


newdata_diff_N<-sort(unique(data$ln_diff_N))
newdata_diff_N<-newdata_diff_N[newdata_diff_N >0]
names(newdata_diff_N)<-c("ln_diff_N")
newdata_1<-data.frame(newdata_diff_N,rep(0, length(newdata_diff_N)), rep(0,length(newdata_diff_N)),rep(1,length(newdata_diff_N)), 
                      rep(0,length(newdata_diff_N)),rep(0,length(newdata_diff_N)),rep(0,length(newdata_diff_N)) )

names(newdata_1)<-c("ln_diff_N","ln_dist_change","moved","consolidated","dist_damage_min","dist_power_min","diff_abs_early")

se_fitt_N2<-predict(ft1,newdata=data.frame(newdata_1),se.fit=T)$se.fit
effect_diff_N2<-predict(ft1,newdata=data.frame(newdata_1)) -coef(ft1)[1]

se_fitt_N3<-predict(ft2,newdata=data.frame(newdata_1),se.fit=T)$se.fit
effect_diff_N3<-predict(ft2,newdata=data.frame(newdata_1)) -coef(ft2)[1]


plot(newdata_diff_N,effect_diff_N2,ylim=c(-.4,.1),xlim=c(.25,3.5),type="n",
     axes=F,xlab="",ylab="")
grid()
par(new=T)
hist(data[data$consolidated==1,]$ln_diff_N,breaks=20,axes=F,xlim=c(.25,3.5),border=F,
     xlab="log(Size|Realized)-log(Size|Assigned)",
     ylab="",main="Effect of Size for Consolidated Voters")

par(new=T)
plot(newdata_diff_N,effect_diff_N2,axes=F,lwd=2,xlab="",ylab="",type="l",
     ylim=c(-.4,.1),xlim=c(.25,3.5)
)
lines(newdata_diff_N,effect_diff_N2+1.96*se_fitt_N2,lwd=2,lty=2)
lines(newdata_diff_N,effect_diff_N2-1.96*se_fitt_N2,lwd=2,lty=2)

lines(newdata_diff_N,effect_diff_N3,lwd=2,col="red")
lines(newdata_diff_N,effect_diff_N3+1.96*se_fitt_N3,lwd=2,lty=2,col="red")
lines(newdata_diff_N,effect_diff_N3-1.96*se_fitt_N3,lwd=2,lty=2,col="red")
axis(1);axis(2)
abline(h=0,lty=3,col="dark red")

###########################################################


newdata_diff_dist<-sort(unique(data$ln_dist_change))
newdata_2<-data.frame(rep(0,length(newdata_diff_dist)),newdata_diff_dist, rep(1,length(newdata_diff_dist)),rep(0,length(newdata_diff_dist)), 
                      rep(0,length(newdata_diff_dist)),rep(0,length(newdata_diff_dist)),rep(0,length(newdata_diff_dist)) )

names(newdata_2)<-c("ln_diff_N","ln_dist_change","moved","consolidated","dist_damage_min","dist_power_min","diff_abs_early")


se_diff_change2<-predict(ft1,newdata=data.frame(newdata_2),se.fit=T)$se.fit
effect_diff_change2<-predict(ft1,newdata=data.frame(newdata_2)) -coef(ft1)[1]

se_diff_change3<-predict(ft2,newdata=data.frame(newdata_2),se.fit=T)$se.fit
effect_diff_change3<-predict(ft2,newdata=data.frame(newdata_2)) -coef(ft2)[1]


plot(newdata_diff_dist,effect_diff_change2,ylim=c(-.4,.1),xlim=c(-1,6),type="n",
     axes=F,xlab="",ylab="")
grid()
par(new=T)
hist(data[data$moved==1,]$ln_dist_change,breaks=20,axes=F,xlim=c(-1,6),border=F,
     xlab="log(Distance|Realized)-log(Distance|Assigned)",
     ylab="",main="Effect of Distance for Reassigned Voters")
par(new=T)
plot(newdata_diff_dist,effect_diff_change2,axes=F,lwd=2,xlab="",ylab="",type="l",
     ylim=c(-.4,.1),xlim=c(-1,6)
  )
lines(newdata_diff_dist,effect_diff_change2+1.96*se_diff_change2,lwd=2,lty=2)
lines(newdata_diff_dist,effect_diff_change2-1.96*se_diff_change2,lwd=2,lty=2)

lines(newdata_diff_dist,effect_diff_change3,lwd=2,col="red")
lines(newdata_diff_dist,effect_diff_change3+1.96*se_diff_change3,lwd=2,lty=2,col="red")
lines(newdata_diff_dist,effect_diff_change3-1.96*se_diff_change3,lwd=2,lty=2,col="red")
axis(1);axis(2)
text(5.25,.05,"3rd Order",col="red",cex=.5)
text(5.25,-.2,"2nd Order",col="black",cex=.5)
abline(h=0,lty=3,col="dark red")


#######################################################################


newdata_diff_N2<-sort(unique(data$lev_diff_N))
newdata_diff_N2<-newdata_diff_N2[newdata_diff_N2 >0]
names(newdata_diff_N2)<-c("lev_diff_N")
newdata_1a<-data.frame(newdata_diff_N2,rep(0, length(newdata_diff_N2)), rep(0,length(newdata_diff_N2)),rep(1,length(newdata_diff_N2)), 
                      rep(0,length(newdata_diff_N2)),rep(0,length(newdata_diff_N2)),rep(0,length(newdata_diff_N2)) )

names(newdata_1a)<-c("lev_diff_N","lev_dist_change","moved","consolidated","dist_damage_min","dist_power_min","diff_abs_early")

se_fitt2_N2<-predict(ft3,newdata=data.frame(newdata_1a),se.fit=T)$se.fit
effect_diff2_N2<-predict(ft3,newdata=data.frame(newdata_1a)) -coef(ft1)[1]

se_fitt2_N3<-predict(ft4,newdata=data.frame(newdata_1a),se.fit=T)$se.fit
effect_diff2_N3<-predict(ft4,newdata=data.frame(newdata_1a)) -coef(ft4)[1]


plot(newdata_diff_N2,effect_diff2_N2,ylim=c(-.25,.05),xlim=c(0,30000),type="n",
     axes=F,xlab="",ylab="")
grid()
par(new=T)
hist(data[data$consolidated==1,]$lev_diff_N,breaks=40,axes=F,xlim=c(0,30000),border=F,
     xlab="Size|Realized-Size|Assigned",
     ylab="",main="")

par(new=T)
plot(newdata_diff_N2,effect_diff2_N2,axes=F,lwd=2,xlab="",ylab="",type="l",
     ylim=c(-.25,.05),xlim=c(0,30000)
)
lines(newdata_diff_N2,effect_diff2_N2+1.96*se_fitt_N2,lwd=2,lty=2)
lines(newdata_diff_N2,effect_diff2_N2-1.96*se_fitt_N2,lwd=2,lty=2)

lines(newdata_diff_N2,effect_diff2_N3,lwd=2,col="red")
lines(newdata_diff_N2,effect_diff2_N3+1.96*se_fitt_N3,lwd=2,lty=2,col="red")
lines(newdata_diff_N2,effect_diff2_N3-1.96*se_fitt_N3,lwd=2,lty=2,col="red")
axis(1);axis(2)
abline(h=0,lty=3,col="dark red")


###########################################################


newdata_diff_dist2<-sort(unique(data$lev_dist_change))
newdata_2a<-data.frame(rep(0,length(newdata_diff_dist2)),newdata_diff_dist2, rep(1,length(newdata_diff_dist2)),rep(0,length(newdata_diff_dist2)), 
                      rep(0,length(newdata_diff_dist2)),rep(0,length(newdata_diff_dist2)),rep(0,length(newdata_diff_dist2)) )

names(newdata_2a)<-c("lev_diff_N","lev_dist_change","moved","consolidated","dist_damage_min","dist_power_min","diff_abs_early")


se_diff2_change2<-predict(ft3,newdata=data.frame(newdata_2a),se.fit=T)$se.fit
effect_diff2_change2<-predict(ft3,newdata=data.frame(newdata_2a)) -coef(ft3)[1]

se_diff2_change3<-predict(ft4,newdata=data.frame(newdata_2a),se.fit=T)$se.fit
effect_diff2_change3<-predict(ft4,newdata=data.frame(newdata_2a)) -coef(ft4)[1]


plot(newdata_diff_dist2,effect_diff2_change2,ylim=c(-.4,.4),xlim=c(-10,15),type="n",
     axes=F,xlab="",ylab="")
grid()
par(new=T)
hist(data[data$moved==1,]$lev_dist_change,breaks=100,axes=F,border=F,
     xlab="Distance|Realized-Distance|Assigned",xlim=c(-10,15),
     ylab="",main="")
par(new=T)
plot(newdata_diff_dist2,effect_diff2_change2,axes=F,lwd=2,xlab="",ylab="",type="l",
     ylim=c(-.4,.4),xlim=c(-10,15)
)
lines(newdata_diff_dist2,effect_diff2_change2+1.96*se_diff2_change2,lwd=2,lty=2)
lines(newdata_diff_dist2,effect_diff2_change2-1.96*se_diff2_change2,lwd=2,lty=2)

lines(newdata_diff_dist2,effect_diff2_change3,lwd=2,col="red")
lines(newdata_diff_dist2,effect_diff2_change3+1.96*se_diff2_change3,lwd=2,lty=2,col="red")
lines(newdata_diff_dist2,effect_diff2_change3-1.96*se_diff2_change3,lwd=2,lty=2,col="red")
axis(1);axis(2)
abline(h=0,lty=3,col="dark red")

dev.off()

